podatki <- read.table("/cloud/project/Poglavje 1/Naloga 1/Mesta.csv", header=TRUE, sep=";", dec=",")
str(podatki)
## 'data.frame':    43 obs. of  4 variables:
##  $ Mesto  : chr  "Amsterdam" "Atene" "Barcelona" "Beograd" ...
##  $ Delo   : num  30.4 41.7 37.5 42.6 34.8 40.1 37.2 39.5 39.8 36.5 ...
##  $ Cene   : int  139 99 109 70 120 85 130 75 71 142 ...
##  $ Dohodek: int  148 75 103 65 158 103 148 94 86 129 ...

Opis spremenljivk:

podatki_MGK <- podatki[, -1] 

library(pastecs) 
round(stat.desc(podatki_MGK, basic=FALSE), 2) 
##               Delo    Cene Dohodek
## median       37.40  125.00  115.00
## mean         37.55  120.88  120.58
## SE.mean       0.38    5.60    5.65
## CI.mean.0.95  0.77   11.30   11.41
## var           6.20 1348.58 1374.34
## std.dev       2.49   36.72   37.07
## coef.var      0.07    0.30    0.31
R <- cor(podatki_MGK) 
round(R, 3) 
##           Delo   Cene Dohodek
## Delo     1.000 -0.709  -0.761
## Cene    -0.709  1.000   0.750
## Dohodek -0.761  0.750   1.000
library(psych)
cortest.bartlett(R, n=nrow(podatki))
## $chisq
## [1] 72.38292
## 
## $p.value
## [1] 1.317917e-15
## 
## $df
## [1] 3
library(psych)
KMO(R) 
## Kaiser-Meyer-Olkin factor adequacy
## Call: KMO(r = R)
## Overall MSA =  0.75
## MSA for each item = 
##    Delo    Cene Dohodek 
##    0.76    0.77    0.72
library(FactoMineR) 
mgk <- PCA(podatki_MGK,  
           scale.unit = TRUE, 
           graph = FALSE) 


library(factoextra)
get_eigenvalue(mgk) 
##       eigenvalue variance.percent cumulative.variance.percent
## Dim.1  2.4803702        82.679006                    82.67901
## Dim.2  0.2914122         9.713740                    92.39275
## Dim.3  0.2282176         7.607255                   100.00000
library(factoextra)
fviz_eig(mgk, 
         choice = "eigenvalue",
         main = "Diagram lastnih vrednosti",
         ylab = "Lastna vrednost",
         xlab = "Glavna komponenta",
         addlabels = TRUE)

library(psych)
fa.parallel(podatki_MGK, 
            sim = FALSE, 
            fa = "pc") 

## Parallel analysis suggests that the number of factors =  NA  and the number of components =  1
library(FactoMineR)
mgk <- PCA(podatki_MGK, 
           scale.unit = TRUE, 
           graph = FALSE,
           ncp = 1) 
print(mgk$var$cor) 
##       Delo       Cene    Dohodek 
##  0.9055009 -0.9005352 -0.9216695
print(mgk$var$contrib) 
##     Delo     Cene  Dohodek 
## 33.05684 32.69527 34.24790
podatki$GK1 <- mgk$ind$coord[ , 1]


head(podatki, 3)
##       Mesto Delo Cene Dohodek        GK1
## 1 Amsterdam 30.4  139     148 -2.3937464
## 2     Atene 41.7   99      75  2.0419237
## 3 Barcelona 37.5  109     103  0.4560978
library(ggplot2)
ggplot(podatki, aes(y=GK1, x=Mesto)) +
  theme_linedraw() +
  geom_bar(stat="identity") +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))